

##################################################
## Human Trafficking Indicators: A New Dataset ##
## Author: Richard W Frank                     ##
## Journal: International Interactions         ##
## Code written: April 14, 2021                ##
## Software: rstan ver. 2.15.1, R ver. 3.3.1   ##
## Purpose: DYNAMIC ANALYSIS AND PLOTS          ## 
#################################################
 
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
library(ggplot2)
library(rstanarm)
library(bayesplot)
library(loo)
rm(list=ls())
library(dplyr)

setwd("~")
load("hti_dynamic.RData")

output <- extract(fit, permuted = TRUE)

### GENERATING LL AND PAR DATA ###
 
var.names <- names(output)
ll <- extract_log_lik(fit, grep("ll_", var.names, value=T))
write.csv(ll, file=paste(fit, "dynamic_ll.csv", sep="_"))
post.var <- as.matrix(fit, par=var.names[!grepl("ll_", var.names)])
write.csv(post.var, file=paste(fit, "dynamic_par.csv", sep="_"), row.names=F)

######################
## extracting betas ##
######################

## this seems the easiest way to do it for my limited skills 
## I save the betas and generate percentiles for graphs and tables
## in stata

dest_beta<-output$beta_dest
write.csv(dest_beta, "dynamic_dest_beta.csv", row.names=TRUE)

ldest_beta<-output$beta_ldest
write.csv(ldest_beta, "dynamic_ldest_beta.csv", row.names=TRUE)

pdest_beta<-output$beta_pdest
write.csv(pdest_beta, "dynamic_pdest_beta.csv", row.names=TRUE)

ddest_beta<-output$beta_ddest
write.csv(ddest_beta, "dynamic_ddest_beta.csv", row.names=TRUE)

dsdest_beta<-output$beta_dsdest
write.csv(dsdest_beta, "dynamic_dsdest_beta.csv", row.names=TRUE)

cldest_beta<-output$beta_cldest
write.csv(cldest_beta, "dynamic_cldest_beta.csv", row.names=TRUE)

cpdest_beta<-output$beta_cpdest
write.csv(cpdest_beta, "dynamic_cpdest_beta.csv", row.names=TRUE)

beta5 = cbind(apply(output$beta_events_5,2,mean),apply(output$beta_events_5,2,quantile,.025),apply(output$beta_events_5,2,quantile,.975))
rownames(beta5)<-c("u_vict_5", 
                   "c_sex_5", 
                   "c_labor_5", 
                   "c_vic_5")
write.csv(beta5,  "dynamic_betas_5.csv", row.names=TRUE)

######################
## extracting thetas #

theta_mean <- apply(output$theta, 2, mean)
theta_sd <- apply(output$theta, 2, sd)

data$theta_mean <- NA
data$theta_sd <- NA
for(ii in 1:nrow(data)){
  data$theta_mean[ii] <- theta_mean[id[ii]]
  data$theta_sd[ii] <- theta_sd[id[ii]]
}

write.csv(dplyr::select(data,ccode,year,theta_mean,theta_sd),
          file="hti_dynamic_thetas.csv", sep="_",na="",row.names=F)

####################
### SAVING ALPHAS ##
#### Difficulty ####

dest_alphas = cbind(apply(output$cut_dest,2,mean),apply(output$cut_dest,2,sd),apply(output$cut_dest, 2, quantile, 0.025 ), apply(output$cut_dest, 2, quantile, 0.975 ))
write.csv(dest_alphas, "dynamic_dest_alphas.csv", row.names=TRUE)

ldest_alphas = cbind(apply(output$cut_ldest,2,mean),apply(output$cut_ldest,2,sd),apply(output$cut_ldest, 2, quantile, 0.025 ), apply(output$cut_ldest, 2, quantile, 0.975 ))
write.csv(ldest_alphas, "dynamic_ldest_alphas.csv", row.names=TRUE)

pdest_alphas = cbind(apply(output$cut_pdest,2,mean),apply(output$cut_pdest,2,sd),apply(output$cut_pdest, 2, quantile, 0.025 ), apply(output$cut_pdest, 2, quantile, 0.975 ))
write.csv(pdest_alphas, "dynamic_pdest_alphas.csv", row.names=TRUE)

ddest_alphas = cbind(apply(output$cut_ddest,2,mean),apply(output$cut_ddest,2,sd),apply(output$cut_ddest, 2, quantile, 0.025 ), apply(output$cut_ddest, 2, quantile, 0.975 ))
write.csv(ddest_alphas, "dynamic_ddest_alphas.csv", row.names=TRUE)

dsdest_alphas = cbind(apply(output$cut_dsdest,2,mean),apply(output$cut_dsdest,2,sd),apply(output$cut_dsdest, 2, quantile, 0.025 ), apply(output$cut_dsdest, 2, quantile, 0.975 ))
write.csv(dsdest_alphas, "dynamic_dsdest_alphas.csv", row.names=TRUE)

cldest_alphas = cbind(apply(output$cut_cldest,2,mean),apply(output$cut_cldest,2,sd),apply(output$cut_cldest, 2, quantile, 0.025 ), apply(output$cut_cldest, 2, quantile, 0.975 ))
write.csv(cldest_alphas, "dynamic_cldest_alphas.csv", row.names=TRUE)

cpdest_alphas = cbind(apply(output$cut_cpdest,2,mean),apply(output$cut_cpdest,2,sd),apply(output$cut_cpdest, 2, quantile, 0.025 ), apply(output$cut_cpdest, 2, quantile, 0.975 ))
write.csv(cpdest_alphas, "dynamic_cpdest_alphas.csv", row.names=TRUE)

dynamic_event_alphas = cbind(apply(output$cut_events_5[,,1],2,mean),apply(output$cut_events_5[,,1],2,sd),apply(output$cut_events_5[,,1], 2, quantile, 0.975 ),apply(output$cut_events_5[,,1], 2, quantile, 0.025),
                             apply(output$cut_events_5[,,2],2,mean),apply(output$cut_events_5[,,2],2,sd),apply(output$cut_events_5[,,2], 2, quantile, 0.975 ),apply(output$cut_events_5[,,2], 2, quantile, 0.025),
                             apply(output$cut_events_5[,,3],2,mean),apply(output$cut_events_5[,,3],2,sd),apply(output$cut_events_5[,,3], 2, quantile, 0.975 ),apply(output$cut_events_5[,,3], 2, quantile, 0.025),
                             apply(output$cut_events_5[,,4],2,mean),apply(output$cut_events_5[,,4],2,sd),apply(output$cut_events_5[,,4], 2, quantile, 0.975 ),apply(output$cut_events_5[,,4], 2, quantile, 0.025),                        
                             apply(output$beta_events_5,2,mean),apply(output$beta_events_5,2,sd),apply(output$beta_events_5, 2, quantile, 0.975 ),apply(output$beta_events_5, 2, quantile, 0.025 ) )
colnames(dynamic_event_alphas)<-c("c1_mean","c1_sd","c1_975","c1_025",
                                  "c2_mean","c2_sd","c2_975","c2_025",
                                  "c3_mean", "c3_sd", "c3_975", "c3_025",
                                  "c4_mean", "c4_sd", "c4_975", "c4_025",
                                  "b_mean","b_sd","b_975","b_025")
rownames(dynamic_event_alphas)<-c("u_vict_5", "c_sex_5",  "c_labor_5",  "c_vic_5")

## writing to csv to plot in Stata
write.csv(dynamic_event_alphas, "dynamic_event_alphas.csv", row.names=TRUE)

#############################
## Posterior distributions ##
plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")
mcmc_areas(fit,
           pars = c("beta_dest" , 
                    "beta_ldest", 
                    "beta_pdest", 
                    "beta_ddest", 
                    "beta_dsdest", 
                    "beta_cldest", 
                    "beta_cpdest"),
           prob = 0.8)  

##################################
## POSTERIOR PREDICTIVE CHECKS ###
fit %>%
  posterior_predict(draws = 500) %>%
  ppc_stat_grouped(y = dest$mpg,
                   group = mtcars$carb,
                   stat = "median")

## posterior distributions graph
plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")



##############
## RHATS ##

## Rhat is a convergence diagnostic which compares parameter estimates across the chains. 
## FROM: https://blog.methodsconsultants.com/posts/introduction-to-stan-in-r/
## If the chains have converged and mixed well, then the Rhat value should be near 1. 
## If the chains have not converged to the same value, then the Rhat value will be larger than 1. 
## Rhat values of 1.05 or higher suggest a convergence issue. 

rhats <- rhat(fit)
write.csv(rhats, "dynamic_rhats.csv", row.names=TRUE)

## 1. GELMAN-RUBIN Criterion for convergence
library(coda)
gelman.diag(output)


## 2. Monte Carlo Error
## A rule of thumb is that the Monte Carlo error should not be more than 5% of the standard deviation2
## Lesaffre, E. M. and A. B. Lawson (2012). Bayesian Biostatistics. John Wiley & Sons. doi: 10.1002/9781119942412

MC_error(fit)

###################
## 3. Traceplots ##

## the thing to look for here is visualizing the posterior sample 
## Useful example: https://cran.r-project.org/web/packages/JointAI/vignettes/AfterFitting.html  
## When the sampler has converged the chains show one horizontal band.
## These trace plots would then suggest that both models have converged. 
## For all parameters, the four chains have mixed and there are no clear trends.

mcmc_trace(fit, regex_pars = "beta")
mcmc_trace(fit, regex_pars = "cut_dest")


##
lp <- log_posterior(fit)
np <- nuts_params(fit)
posterior <- as.array(fit)


######################
## 4. Density plots ##
library(coda)
densplot(output, start=1000, ncol = 3, col = c("darkred", "darkblue", "darkgreen"),
         vlines = list(list(v = c(rep(0, length(coef(fit) )), NA),
                            col = grey(0.8))))


################################
## EFFECTIVE SAMPLE SIZE NEFF ##
## For Figure G2

neff=summary(fit)$summary[,'n_eff']
neff_ratio<-neff_ratio(fit)
mcmc_neff(neff_ratio, size = 2)
write.csv(neff_ratio, "dynamic_neff_ratio.csv", row.names=TRUE) 

 

## Posterior predictive checks ##
## seeing in histograms whether predicted data from theta estimates match with the observed data.
pp_check(fit, plotfun = "dens_overlay", nreps = 10)

color_scheme_set("darkgray")
pp_check(fit, type = "overlaid")
pp_check(fit, type='stat', stat='mean')